On 2020-01-14 I run the analysis of Gene Ontology terms. Here I want to focus on part of those results and make some findings clearer. I will use results only from gene annotation, and not from annotation of transcripts, which is a somewhat noiser dataset. I will not look at the functions affected by hatching condition, since it only affects a couple of genes, to be inspected later. Among all cellular functions significantly enriched with genes differentially expressed among selective regimes, I will focus on those that are most significant, and detailed enough. For example, I skip terms like transmembrane transport, proteolysis or protein phosphorylation, because they are too general. Sections below are named for the functional categories that I think are worth discussing. I will try to contextualize the results. My entry point to the literature is [GarciaRoger2019].
I need the results of the differential expression analysis (2020-01-08) to identify the genes annotated with the significant GO terms, and to assess the direction of gene expression change between selective regimes.
library(variancePartition)
library(GO.db)
library(ggplot2)
library(tidyr)
library(reactable)
ANNOTATION <- '../2019-07-26/genes/annotation.txt'
EXPRESSION <- '../2020-01-08/genes.RData' # Includes the variance partition and the mixed models fitted with dream()
The annotation table includes only the most specific terms that can be assigned to a gene. The more general, ancestor terms are assumed, of course, but not explicitly mentioned in the annotation. For the purpose of retrieving all genes annotated to a term, I need to expand the annotation, to make ancestor terms explicit. It took me a while to realize that I can do this with the unlist() function:
annotation <- read.table(ANNOTATION, col.names = c('gene', 'GOterms'), colClasses = c('character', 'character'))
head(annotation)
## gene GOterms
## 1 XLOC_000002 GO:0008417|GO:0016020|GO:0006486
## 2 XLOC_000009 GO:0043130|GO:0005515|GO:0043161
## 3 XLOC_000010 GO:0005840|GO:0015935|GO:0003735|GO:0006412
## 4 XLOC_000015 GO:0006355|GO:0003700|GO:0006357|GO:0043565|GO:0005634|GO:0032502
## 5 XLOC_000021 GO:0016567|GO:0006397|GO:0061630|GO:0008270
## 6 XLOC_000036 GO:0005272|GO:0006814|GO:0016020
annotation.list <- strsplit(annotation$GOterms, split='|', fixed=TRUE)
names(annotation.list) <- annotation$gene
# append can only join two vectors at a time. It works with lists!
allAncestors <- append(as.list(GOBPANCESTOR),
append(as.list(GOMFANCESTOR),
as.list(GOCCANCESTOR)))
fullAnnotation <- lapply(annotation.list,
FUN = function(x){
unique(append(x,
unlist(allAncestors[x], use.names=FALSE)))
})
load(EXPRESSION)
ls()
## [1] "allAncestors" "annotation" "ANNOTATION" "annotation.list"
## [5] "EXPRESSION" "fitmm" "fullAnnotation" "varPart"
## [9] "vobjDream"
head(varPart)
## population regime treatment Residuals
## XLOC_000002 0 0.7508718 0.050900336 0.1982279
## XLOC_000007 0 0.1579138 0.002683147 0.8394031
## XLOC_000008 0 0.7439376 0.008721038 0.2473413
## XLOC_000010 0 0.6887574 0.026242757 0.2849999
## XLOC_000015 0 0.6014947 0.080366812 0.3181385
## XLOC_000017 0 0.6576267 0.104331931 0.2380414
This is term GO:0006289. It is among the most significantly enriched terms, with only 13 genes annotated.
goterm <- 'GO:0006289'
genes <- names(fullAnnotation[grep(goterm, fullAnnotation, fixed=TRUE)])
# Not all genes annotated have been used in expression analysis
genes <- genes[genes %in% row.names(varPart)]
plotPercentBars(varPart[genes,])
topTable(fitmm, coef='regime', number=length(fitmm$F))[genes,]
## logFC AveExpr t P.Value adj.P.Val z.std
## XLOC_010263 0.4805689 9.156849 2.480289 0.038094479 0.11615796 2.073837
## XLOC_011770 0.5724437 4.342310 3.292678 0.010978090 0.09515806 2.543395
## XLOC_014514 0.5500148 8.516357 3.096622 0.014772049 0.09661390 2.437920
## XLOC_021296 0.4823525 5.543537 3.149725 0.013604111 0.09626821 2.467550
## XLOC_021489 0.5636265 5.534408 3.342938 0.010186586 0.09515806 2.569430
## XLOC_025290 0.2317128 3.904687 1.744670 0.119195126 0.21805421 1.558161
## XLOC_027639 0.8162588 7.715813 4.576715 0.001809784 0.09103989 3.119793
## XLOC_033994 0.3621536 6.282976 1.869033 0.098553396 0.19350676 1.651908
## XLOC_044202 0.6329653 6.546929 3.369857 0.009787651 0.09515806 2.583242
## XLOC_044365 0.5563076 5.723606 4.273286 0.003101038 0.09103989 2.957541
## XLOC_045112 0.4071535 6.031860 2.438428 0.040699325 0.11945631 2.046580
## XLOC_047039 0.4036392 5.504411 3.430756 0.011858741 0.09568805 2.516320
## XLOC_048239 0.4591151 3.174643 4.667442 0.001607843 0.09103989 3.154480
z <- as.data.frame(vobjDream$E[genes,])
z$gene <- row.names(z)
GE <- pivot_longer(z, cols=c(1:12), names_to='sample', values_to='expression')
GE$regime <- factor(NA, levels=c('regular', 'random'))
GE[GE$sample %in% c("X1A_S8", "X1C_S1", "X2A_S7", "X2C_S5", "X4A_S6", "X4C_S12"), 'regime'] <- 'random'
GE[GE$sample %in% c("X3A_S9", "X3C_S11", "X5A_S2", "X5C_S4", "X6A_S3", "X6C_S10"), 'regime'] <- 'regular'
ggplot(data=GE, mapping=aes(x=regime, y=expression, color=regime)) + geom_boxplot() + facet_wrap(~gene) + ggtitle(Term(GOTERM[goterm]))
goterm <- 'GO:0007186'
genes <- names(fullAnnotation[grep(goterm, fullAnnotation, fixed=TRUE)])
genes <- genes[genes %in% row.names(varPart)]
# I subset varPart and at the same time order the selected rows by 'regime', and the columns in the order desired.
plotPercentBars(varPart[genes[order(-varPart[genes, 'regime'])], c('regime', 'treatment', 'population', 'Residuals')])
z <- as.data.frame(vobjDream$E[genes,])
z$gene <- row.names(z)
GE <- pivot_longer(z, cols=c(1:12), names_to='sample', values_to='expression')
GE$regime <- factor(NA, levels=c('regular', 'random'))
GE[GE$sample %in% c("X1A_S8", "X1C_S1", "X2A_S7", "X2C_S5", "X4A_S6", "X4C_S12"), 'regime'] <- 'random'
GE[GE$sample %in% c("X3A_S9", "X3C_S11", "X5A_S2", "X5C_S4", "X6A_S3", "X6C_S10"), 'regime'] <- 'regular'
ggplot(data=GE, mapping=aes(x=regime, y=expression, color=regime)) + geom_boxplot() + ggtitle(Term(GOTERM[goterm]))
Among the 206 genes in this category, 55 have an adjusted p-value lower or equal to 0.1. There are too many genes annotated with this function to plot all their stratified expression levels. I select the most differentially expressed ones.
z <- topTable(fitmm, coef='regime', number=length(fitmm$F))
z <- z[row.names(z) %in% genes, ] # gene order is preserved:
reactable(z)
genes <- head(row.names(z), n=50)
GE <- GE[GE$gene %in% genes,]
ggplot(data=GE, mapping=aes(x=regime, y=expression, color=regime)) + geom_boxplot() +
facet_wrap(~gene) + ggtitle(Term(GOTERM[goterm]))
Among the top 50 genes analysed from the G protein-coupled receptor signaling pathway, 44 have a reduced expression level in the unpredictable environment, and 6 show an increased expression level.
goterm <- 'GO:0007160'
genes <- names(fullAnnotation[grep(goterm, fullAnnotation, fixed=TRUE)])
genes <- genes[genes %in% row.names(varPart)]
plotPercentBars(varPart[genes,])
reactable(topTable(fitmm, coef='regime', number=length(fitmm$F))[genes,]) # not ordered by p-value
z <- as.data.frame(vobjDream$E[genes,])
z$gene <- row.names(z)
GE <- pivot_longer(z, cols=c(1:12), names_to='sample', values_to='expression')
GE$regime <- factor('regular', levels=c('regular', 'random'))
GE[GE$sample %in% c("X1A_S8", "X1C_S1", "X2A_S7", "X2C_S5", "X4A_S6", "X4C_S12"), 'regime'] <- 'random'
ggplot(data=GE, mapping=aes(x=regime, y=expression, color=regime)) + geom_boxplot() + facet_wrap(~gene) + ggtitle(Term(GOTERM[goterm]))
Among the top 16 genes involved in cell-matrix adhesion, 14 have a reduced expression level in the unpredictable environment, and 2 show an increased expression level.
Several functions related to ion transport are significant. I wonder to what extent their significance is not dependent on a common subset of genes.
goterm_K <- 'GO:0006813'
goterm_COOH <- 'GO:0046942'
goterm_Sulf <- 'GO:0008272'
goterm_Nucl <- 'GO:1901642'
goterm_TM <- 'GO:0034220'
genes_K <- names(fullAnnotation[grep(goterm_K, fullAnnotation, fixed=TRUE)])
genes_COOH <- names(fullAnnotation[grep(goterm_COOH, fullAnnotation, fixed=TRUE)])
genes_Sulf <- names(fullAnnotation[grep(goterm_Sulf, fullAnnotation, fixed=TRUE)])
genes_Nucl <- names(fullAnnotation[grep(goterm_Nucl, fullAnnotation, fixed=TRUE)])
genes_TM <- names(fullAnnotation[grep(goterm_TM, fullAnnotation, fixed=TRUE)])
vennDiagram(vennCounts(cbind(Potassium = row.names(varPart) %in% genes_K,
Carboxilic = row.names(varPart) %in% genes_COOH,
Sulfate = row.names(varPart) %in% genes_Sulf,
Nucleoside = row.names(varPart) %in% genes_Nucl,
Transmembrane = row.names(varPart) %in% genes_TM)))
There is hardly any overlap among genes involved in the transport of these substances. The only remarkable overlap happens between potassium ion transport and ion transmembrane transport. But, still the significance of each term should be quite independent of the significance of any other term. This was expected, because the analysis of functional enrichment applied algorithms to account for the dependence structure among GO terms. I analyse them separately, below.
goterm <- goterm_K
genes <- genes_K # defined above
genes <- genes[genes %in% row.names(varPart)]
plotPercentBars(varPart[genes[order(-varPart[genes, 'regime'])], c('regime', 'treatment', 'population', 'Residuals')])
#plotPercentBars(varPart[genes,])
# I can't keep from using reactable, even if it does not display in the online version...
reactable(topTable(fitmm, coef='regime', number=length(fitmm$F))[genes,],
columns = list(
logFC = colDef(format=colFormat(digits=3)),
AveExpr = colDef(format=colFormat(digits=3)),
t = colDef(format=colFormat(digits=3)),
P.Value = colDef(format=colFormat(digits=3)),
adj.P.Val = colDef (format=colFormat(digits=3)),
z.std = colDef(format=colFormat(digits=3))
)) # not ordered by p-value, just click on the column header to sort.
z <- as.data.frame(vobjDream$E) # this is the whole expression matrix
z2 <- topTable(fitmm, coef='regime', number=length(fitmm$F), sort.by='logFC')
z2 <- z2[row.names(z2) %in% genes,] # I want to preserve order by logFC
z <- z[row.names(z2),]
# Defining z$gene as a factor with ordered levels will make ggplot order the genes in ascending
# logFC order.
z$gene <- factor(row.names(z), levels=row.names(z2), ordered=TRUE)
GE <- pivot_longer(z, cols=c(1:12), names_to='sample', values_to='expression')
GE$regime <- factor('regular', levels=c('regular', 'random'))
GE[GE$sample %in% c("X1A_S8", "X1C_S1", "X2A_S7", "X2C_S5", "X4A_S6", "X4C_S12"), 'regime'] <- 'random'
ggplot(data=GE, mapping=aes(x=regime, y=expression, color=regime)) + geom_boxplot() + facet_wrap(~gene) + ggtitle(Term(GOTERM[goterm]))
goterm <- goterm_TM
genes <- genes_TM # defined above.
genes <- genes[genes %in% row.names(varPart)]
plotPercentBars(varPart[genes[order(-varPart[genes, 'regime'])], c('regime', 'treatment', 'population', 'Residuals')])
z <- topTable(fitmm, coef='regime', number=length(fitmm$F))
z <- z[row.names(z) %in% genes,] # still ordered by p-value!
reactable(z,
columns = list(
logFC = colDef(format=colFormat(digits=3)),
AveExpr = colDef(format=colFormat(digits=3)),
t = colDef(format=colFormat(digits=3)),
P.Value = colDef(format=colFormat(digits=3)),
adj.P.Val = colDef (format=colFormat(digits=3)),
z.std = colDef(format=colFormat(digits=3))
))
# To make plots visible, I plot only the most significant terms, say at FDR=0.1.
# Again, I want to select and sort the rows of 'z' at the same time.
genes <- row.names(z)[z$adj.P.Val <= 0.1]
z2 <- z[genes[order(z[genes,'logFC'])],]
# I rename it as z2 to be consistent with previous chunks. This is part of the topTable,
# used only for sorting genes in the expression matrix, named z. Excuse the recycling
# of variable names.
z <- as.data.frame(vobjDream$E) # this is the whole expression matrix
z <- z[row.names(z2),]
# Defining z$gene as a factor with ordered levels will make ggplot order the genes in ascending
# logFC order.
z$gene <- factor(row.names(z), levels=row.names(z2), ordered=TRUE)
GE <- pivot_longer(z, cols=c(1:12), names_to='sample', values_to='expression')
GE$regime <- factor('regular', levels=c('regular', 'random'))
GE[GE$sample %in% c("X1A_S8", "X1C_S1", "X2A_S7", "X2C_S5", "X4A_S6", "X4C_S12"), 'regime'] <- 'random'
ggplot(data=GE, mapping=aes(x=regime, y=expression, color=regime)) + geom_boxplot() + facet_wrap(~gene) + ggtitle(Term(GOTERM[goterm]))
goterm <- goterm_Nucl
genes <- genes_Nucl
plotPercentBars(varPart[genes[order(-varPart[genes, 'regime'])], c('regime', 'treatment', 'population', 'Residuals')])
z2 <- topTable(fitmm, coef='regime', number=length(fitmm$F))
z2 <- z2[row.names(z2) %in% genes,] # still ordered by p-value!
reactable(z2,
columns = list(
logFC = colDef(format=colFormat(digits=3)),
AveExpr = colDef(format=colFormat(digits=3)),
t = colDef(format=colFormat(digits=3)),
P.Value = colDef(format=colFormat(digits=3)),
adj.P.Val = colDef (format=colFormat(digits=3)),
z.std = colDef(format=colFormat(digits=3))
))
# I wanted the table to be ordered by FDR, but I need it now ordered by logFC:
z2 <- topTable(fitmm, coef='regime', number=length(fitmm$F), resort.by='logFC')
z2 <- z2[row.names(z2) %in% genes,]
z <- as.data.frame(vobjDream$E) # this is the whole expression matrix
z <- z[row.names(z2),]
z$gene <- factor(row.names(z), levels=row.names(z2), ordered=TRUE)
GE <- pivot_longer(z, cols=c(1:12), names_to='sample', values_to='expression')
GE$regime <- factor('regular', levels=c('regular', 'random'))
GE[GE$sample %in% c("X1A_S8", "X1C_S1", "X2A_S7", "X2C_S5", "X4A_S6", "X4C_S12"), 'regime'] <- 'random'
ggplot(data=GE, mapping=aes(x=regime, y=expression, color=regime)) + geom_boxplot() + facet_wrap(~gene) + ggtitle(Term(GOTERM[goterm]))
goterm <- goterm_COOH
genes <- genes_COOH
plotPercentBars(varPart[genes[order(-varPart[genes, 'regime'])], c('regime', 'treatment', 'population', 'Residuals')])
z2 <- topTable(fitmm, coef='regime', number=length(fitmm$F))
z2 <- z2[row.names(z2) %in% genes,] # still ordered by p-value!
reactable(z2,
columns = list(
logFC = colDef(format=colFormat(digits=3)),
AveExpr = colDef(format=colFormat(digits=3)),
t = colDef(format=colFormat(digits=3)),
P.Value = colDef(format=colFormat(digits=3)),
adj.P.Val = colDef (format=colFormat(digits=3)),
z.std = colDef(format=colFormat(digits=3))
))
# I wanted the table to be ordered by FDR, but I need it now ordered by logFC:
z2 <- topTable(fitmm, coef='regime', number=length(fitmm$F), resort.by='logFC')
z2 <- z2[row.names(z2) %in% genes,]
z <- as.data.frame(vobjDream$E) # this is the whole expression matrix
z <- z[row.names(z2),]
z$gene <- factor(row.names(z), levels=row.names(z2), ordered=TRUE)
GE <- pivot_longer(z, cols=c(1:12), names_to='sample', values_to='expression')
GE$regime <- factor('regular', levels=c('regular', 'random'))
GE[GE$sample %in% c("X1A_S8", "X1C_S1", "X2A_S7", "X2C_S5", "X4A_S6", "X4C_S12"), 'regime'] <- 'random'
ggplot(data=GE, mapping=aes(x=regime, y=expression, color=regime)) + geom_boxplot() + facet_wrap(~gene) + ggtitle(Term(GOTERM[goterm]))
goterm <- 'GO:0005992'
genes <- names(fullAnnotation[grep(goterm, fullAnnotation, fixed=TRUE)])
genes <- genes[genes %in% row.names(varPart)]
plotPercentBars(varPart[genes[order(-varPart[genes, 'regime'])], c('regime', 'treatment', 'population', 'Residuals')])
z2 <- topTable(fitmm, coef='regime', number=length(fitmm$F))
z2 <- z2[row.names(z2) %in% genes,] # still ordered by p-value!
reactable(z2,
columns = list(
logFC = colDef(format=colFormat(digits=3)),
AveExpr = colDef(format=colFormat(digits=3)),
t = colDef(format=colFormat(digits=3)),
P.Value = colDef(format=colFormat(digits=3)),
adj.P.Val = colDef (format=colFormat(digits=3)),
z.std = colDef(format=colFormat(digits=3))
))
# It's actually faster to re-order z2 after having selected the rows.
z2 <- z2[order(z2$logFC),]
z <- as.data.frame(vobjDream$E) # this is the whole expression matrix
z <- z[row.names(z2),]
z$gene <- factor(row.names(z), levels=row.names(z2), ordered=TRUE)
GE <- pivot_longer(z, cols=c(1:12), names_to='sample', values_to='expression')
GE$regime <- factor('regular', levels=c('regular', 'random'))
GE[GE$sample %in% c("X1A_S8", "X1C_S1", "X2A_S7", "X2C_S5", "X4A_S6", "X4C_S12"), 'regime'] <- 'random'
ggplot(data=GE, mapping=aes(x=regime, y=expression, color=regime)) + geom_boxplot() + facet_wrap(~gene) + ggtitle(Term(GOTERM[goterm]))
goterm_cellmoti <- 'GO:0048870' # cell motility
goterm_movement <- 'GO:0003341' # cillium movement
goterm_assembly <- 'GO:0060271' # cillium assembly
genes_cellmoti <- names(fullAnnotation[grep(goterm_cellmoti, fullAnnotation, fixed=TRUE)])
genes_movement <- names(fullAnnotation[grep(goterm_movement, fullAnnotation, fixed=TRUE)])
genes_assembly <- names(fullAnnotation[grep(goterm_assembly, fullAnnotation, fixed=TRUE)])
vennDiagram(vennCounts(cbind('Cell motility' = row.names(varPart) %in% genes_cellmoti,
'Cillium movement' = row.names(varPart) %in% genes_movement,
'Cillium assembly' = row.names(varPart) %in% genes_assembly)))
goterm <- goterm_cellmoti
genes <- genes_cellmoti
genes <- genes[genes %in% row.names(varPart)]
plotPercentBars(varPart[genes[order(-varPart[genes, 'regime'])], c('regime', 'treatment', 'population', 'Residuals')])
z2 <- topTable(fitmm, coef='regime', number=length(fitmm$F))
z2 <- z2[row.names(z2) %in% genes,] # still ordered by p-value!
reactable(z2,
columns = list(
logFC = colDef(format=colFormat(digits=3)),
AveExpr = colDef(format=colFormat(digits=3)),
t = colDef(format=colFormat(digits=3)),
P.Value = colDef(format=colFormat(digits=3)),
adj.P.Val = colDef (format=colFormat(digits=3)),
z.std = colDef(format=colFormat(digits=3))
))
# It's actually faster to re-order z2 after having selected the rows.
z2 <- z2[order(z2$logFC),]
z <- as.data.frame(vobjDream$E) # this is the whole expression matrix
z <- z[row.names(z2),]
z$gene <- factor(row.names(z), levels=row.names(z2), ordered=TRUE)
GE <- pivot_longer(z, cols=c(1:12), names_to='sample', values_to='expression')
GE$regime <- factor('regular', levels=c('regular', 'random'))
GE[GE$sample %in% c("X1A_S8", "X1C_S1", "X2A_S7", "X2C_S5", "X4A_S6", "X4C_S12"), 'regime'] <- 'random'
ggplot(data=GE, mapping=aes(x=regime, y=expression, color=regime)) + geom_boxplot() + facet_wrap(~gene) + ggtitle(Term(GOTERM[goterm]))
goterm <- goterm_movement
genes <- genes_movement
genes <- genes[genes %in% row.names(varPart)]
plotPercentBars(varPart[genes[order(-varPart[genes, 'regime'])], c('regime', 'treatment', 'population', 'Residuals')])
z2 <- topTable(fitmm, coef='regime', number=length(fitmm$F))
z2 <- z2[row.names(z2) %in% genes,] # still ordered by p-value!
reactable(z2,
columns = list(
logFC = colDef(format=colFormat(digits=3)),
AveExpr = colDef(format=colFormat(digits=3)),
t = colDef(format=colFormat(digits=3)),
P.Value = colDef(format=colFormat(digits=3)),
adj.P.Val = colDef (format=colFormat(digits=3)),
z.std = colDef(format=colFormat(digits=3))
))
# It's actually faster to re-order z2 after having selected the rows.
z2 <- z2[order(z2$logFC),]
z <- as.data.frame(vobjDream$E) # this is the whole expression matrix
z <- z[row.names(z2),]
z$gene <- factor(row.names(z), levels=row.names(z2), ordered=TRUE)
GE <- pivot_longer(z, cols=c(1:12), names_to='sample', values_to='expression')
GE$regime <- factor('regular', levels=c('regular', 'random'))
GE[GE$sample %in% c("X1A_S8", "X1C_S1", "X2A_S7", "X2C_S5", "X4A_S6", "X4C_S12"), 'regime'] <- 'random'
ggplot(data=GE, mapping=aes(x=regime, y=expression, color=regime)) + geom_boxplot() + facet_wrap(~gene) + ggtitle(Term(GOTERM[goterm]))
goterm <- goterm_assembly
genes <- genes_assembly
genes <- genes[genes %in% row.names(varPart)]
plotPercentBars(varPart[genes[order(-varPart[genes, 'regime'])], c('regime', 'treatment', 'population', 'Residuals')])
z2 <- topTable(fitmm, coef='regime', number=length(fitmm$F))
z2 <- z2[row.names(z2) %in% genes,] # still ordered by p-value!
reactable(z2,
columns = list(
logFC = colDef(format=colFormat(digits=3)),
AveExpr = colDef(format=colFormat(digits=3)),
t = colDef(format=colFormat(digits=3)),
P.Value = colDef(format=colFormat(digits=3)),
adj.P.Val = colDef (format=colFormat(digits=3)),
z.std = colDef(format=colFormat(digits=3))
))
# It's actually faster to re-order z2 after having selected the rows.
z2 <- z2[order(z2$logFC),]
z <- as.data.frame(vobjDream$E) # this is the whole expression matrix
z <- z[row.names(z2),]
z$gene <- factor(row.names(z), levels=row.names(z2), ordered=TRUE)
GE <- pivot_longer(z, cols=c(1:12), names_to='sample', values_to='expression')
GE$regime <- factor('regular', levels=c('regular', 'random'))
GE[GE$sample %in% c("X1A_S8", "X1C_S1", "X2A_S7", "X2C_S5", "X4A_S6", "X4C_S12"), 'regime'] <- 'random'
ggplot(data=GE, mapping=aes(x=regime, y=expression, color=regime)) +
geom_boxplot() + facet_wrap(~gene) + ggtitle(Term(GOTERM[goterm]))
goterm <- 'GO:0043161'
genes <- names(fullAnnotation[grep(goterm, fullAnnotation, fixed=TRUE)])
genes <- genes[genes %in% row.names(varPart)]
plotPercentBars(varPart[genes[order(-varPart[genes, 'regime'])], c('regime', 'treatment', 'population', 'Residuals')])
z2 <- topTable(fitmm, coef='regime', number=length(fitmm$F))
z2 <- z2[row.names(z2) %in% genes,] # still ordered by p-value!
reactable(z2,
columns = list(
logFC = colDef(format=colFormat(digits=3)),
AveExpr = colDef(format=colFormat(digits=3)),
t = colDef(format=colFormat(digits=3)),
P.Value = colDef(format=colFormat(digits=3)),
adj.P.Val = colDef (format=colFormat(digits=3)),
z.std = colDef(format=colFormat(digits=3))
))
# It's actually faster to re-order z2 after having selected the rows.
z2 <- z2[order(z2$logFC),]
z <- as.data.frame(vobjDream$E) # this is the whole expression matrix
z <- z[row.names(z2),]
z$gene <- factor(row.names(z), levels=row.names(z2), ordered=TRUE)
GE <- pivot_longer(z, cols=c(1:12), names_to='sample', values_to='expression')
GE$regime <- factor('regular', levels=c('regular', 'random'))
GE[GE$sample %in% c("X1A_S8", "X1C_S1", "X2A_S7", "X2C_S5", "X4A_S6", "X4C_S12"), 'regime'] <- 'random'
ggplot(data=GE, mapping=aes(x=regime, y=expression, color=regime)) +
geom_boxplot() + facet_wrap(~gene) + ggtitle(Term(GOTERM[goterm]))
goterm <- 'GO:0006979'
genes <- names(fullAnnotation[grep(goterm, fullAnnotation, fixed=TRUE)])
genes <- genes[genes %in% row.names(varPart)]
plotPercentBars(varPart[genes[order(-varPart[genes, 'regime'])], c('regime', 'treatment', 'population', 'Residuals')])
z2 <- topTable(fitmm, coef='regime', number=length(fitmm$F))
z2 <- z2[row.names(z2) %in% genes,] # still ordered by p-value!
reactable(z2,
columns = list(
logFC = colDef(format=colFormat(digits=3)),
AveExpr = colDef(format=colFormat(digits=3)),
t = colDef(format=colFormat(digits=3)),
P.Value = colDef(format=colFormat(digits=3)),
adj.P.Val = colDef (format=colFormat(digits=3)),
z.std = colDef(format=colFormat(digits=3))
))
# It's actually faster to re-order z2 after having selected the rows.
z2 <- z2[order(z2$logFC),]
z <- as.data.frame(vobjDream$E) # this is the whole expression matrix
z <- z[row.names(z2),]
z$gene <- factor(row.names(z), levels=row.names(z2), ordered=TRUE)
GE <- pivot_longer(z, cols=c(1:12), names_to='sample', values_to='expression')
GE$regime <- factor('regular', levels=c('regular', 'random'))
GE[GE$sample %in% c("X1A_S8", "X1C_S1", "X2A_S7", "X2C_S5", "X4A_S6", "X4C_S12"), 'regime'] <- 'random'
ggplot(data=GE, mapping=aes(x=regime, y=expression, color=regime)) +
geom_boxplot() + facet_wrap(~gene) + ggtitle(Term(GOTERM[goterm]))
goterm <- 'GO:0003774'
genes <- names(fullAnnotation[grep(goterm, fullAnnotation, fixed=TRUE)])
genes <- genes[genes %in% row.names(varPart)]
plotPercentBars(varPart[genes[order(-varPart[genes, 'regime'])],
c('regime', 'treatment', 'population', 'Residuals')])
z2 <- topTable(fitmm, coef='regime', number=length(fitmm$F))
z2 <- z2[row.names(z2) %in% genes,] # still ordered by p-value!
reactable(z2,
columns = list(
logFC = colDef(format=colFormat(digits=3)),
AveExpr = colDef(format=colFormat(digits=3)),
t = colDef(format=colFormat(digits=3)),
P.Value = colDef(format=colFormat(digits=3)),
adj.P.Val = colDef (format=colFormat(digits=3)),
z.std = colDef(format=colFormat(digits=3))
))
# I limit the number of genes.
z2 <- z2[z2$adj.P.Val <= 0.1,]
z2 <- z2[order(z2$logFC),]
z <- as.data.frame(vobjDream$E) # this is the whole expression matrix
z <- z[row.names(z2),]
z$gene <- factor(row.names(z), levels=row.names(z2), ordered=TRUE)
GE <- pivot_longer(z, cols=c(1:12), names_to='sample', values_to='expression')
GE$regime <- factor('regular', levels=c('regular', 'random'))
GE[GE$sample %in% c("X1A_S8", "X1C_S1", "X2A_S7", "X2C_S5", "X4A_S6", "X4C_S12"), 'regime'] <- 'random'
ggplot(data=GE, mapping=aes(x=regime, y=expression, color=regime)) +
geom_boxplot() + facet_wrap(~gene) + ggtitle(Term(GOTERM[goterm]))
goterm_metallocarboxy <- 'GO:0004181'
goterm_metalloendopep <- 'GO:0004222'
goterm_serinetypeendo <- 'GO:0004252'
goterm_calciumcystein <- 'GO:0004198'
genes_metallocarboxy <- names(fullAnnotation[grep(goterm_metallocarboxy, fullAnnotation, fixed=TRUE)])
genes_metalloendopep <- names(fullAnnotation[grep(goterm_metalloendopep, fullAnnotation, fixed=TRUE)])
genes_serinetypeendo <- names(fullAnnotation[grep(goterm_serinetypeendo, fullAnnotation, fixed=TRUE)])
genes_calciumcystein <- names(fullAnnotation[grep(goterm_calciumcystein, fullAnnotation, fixed=TRUE)])
vennDiagram(vennCounts(cbind(Metallocarboxy. = row.names(varPart) %in% genes_metallocarboxy,
Metalloendo. = row.names(varPart) %in% genes_metalloendopep,
Serine_type = row.names(varPart) %in% genes_serinetypeendo,
Cystein_type = row.names(varPart) %in% genes_calciumcystein)))
goterm <- 'GO:0004181'
Term(GOTERM[goterm])
## GO:0004181
## "metallocarboxypeptidase activity"
genes <- names(fullAnnotation[grep(goterm, fullAnnotation, fixed=TRUE)])
genes <- genes[genes %in% row.names(varPart)]
plotPercentBars(varPart[genes[order(-varPart[genes, 'regime'])], c('regime', 'treatment', 'population', 'Residuals')])
z2 <- topTable(fitmm, coef='regime', number=length(fitmm$F))
z2 <- z2[row.names(z2) %in% genes,] # still ordered by p-value!
reactable(z2,
columns = list(
logFC = colDef(format=colFormat(digits=3)),
AveExpr = colDef(format=colFormat(digits=3)),
t = colDef(format=colFormat(digits=3)),
P.Value = colDef(format=colFormat(digits=3)),
adj.P.Val = colDef (format=colFormat(digits=3)),
z.std = colDef(format=colFormat(digits=3))
))
# It's actually faster to re-order z2 after having selected the rows.
z2 <- z2[order(z2$logFC),]
z <- as.data.frame(vobjDream$E) # this is the whole expression matrix
z <- z[row.names(z2),]
z$gene <- factor(row.names(z), levels=row.names(z2), ordered=TRUE)
GE <- pivot_longer(z, cols=c(1:12), names_to='sample', values_to='expression')
GE$regime <- factor('regular', levels=c('regular', 'random'))
GE[GE$sample %in% c("X1A_S8", "X1C_S1", "X2A_S7", "X2C_S5", "X4A_S6", "X4C_S12"), 'regime'] <- 'random'
ggplot(data=GE, mapping=aes(x=regime, y=expression, color=regime)) +
geom_boxplot() + facet_wrap(~gene) + ggtitle(Term(GOTERM[goterm]))
goterm <- 'GO:0004252'
Term(GOTERM[goterm])
## GO:0004252
## "serine-type endopeptidase activity"
genes <- names(fullAnnotation[grep(goterm, fullAnnotation, fixed=TRUE)])
genes <- genes[genes %in% row.names(varPart)]
plotPercentBars(varPart[genes[order(-varPart[genes, 'regime'])], c('regime', 'treatment', 'population', 'Residuals')])
z2 <- topTable(fitmm, coef='regime', number=length(fitmm$F))
z2 <- z2[row.names(z2) %in% genes,] # still ordered by p-value!
reactable(z2,
columns = list(
logFC = colDef(format=colFormat(digits=3)),
AveExpr = colDef(format=colFormat(digits=3)),
t = colDef(format=colFormat(digits=3)),
P.Value = colDef(format=colFormat(digits=3)),
adj.P.Val = colDef (format=colFormat(digits=3)),
z.std = colDef(format=colFormat(digits=3))
))
z2 <- z2[z2$adj.P.Val <= 0.1,] # only significantly regulated genes
z2 <- z2[order(z2$logFC),]
z <- as.data.frame(vobjDream$E) # this is the whole expression matrix
z <- z[row.names(z2),]
z$gene <- factor(row.names(z), levels=row.names(z2), ordered=TRUE)
GE <- pivot_longer(z, cols=c(1:12), names_to='sample', values_to='expression')
GE$regime <- factor('regular', levels=c('regular', 'random'))
GE[GE$sample %in% c("X1A_S8", "X1C_S1", "X2A_S7", "X2C_S5", "X4A_S6", "X4C_S12"), 'regime'] <- 'random'
ggplot(data=GE, mapping=aes(x=regime, y=expression, color=regime)) +
geom_boxplot() + facet_wrap(~gene) + ggtitle(Term(GOTERM[goterm]))
goterm <- 'GO:0004198'
Term(GOTERM[goterm])
## GO:0004198
## "calcium-dependent cysteine-type endopeptidase activity"
genes <- names(fullAnnotation[grep(goterm, fullAnnotation, fixed=TRUE)])
genes <- genes[genes %in% row.names(varPart)]
plotPercentBars(varPart[genes[order(-varPart[genes, 'regime'])], c('regime', 'treatment', 'population', 'Residuals')])
z2 <- topTable(fitmm, coef='regime', number=length(fitmm$F))
z2 <- z2[row.names(z2) %in% genes,] # still ordered by p-value!
reactable(z2,
columns = list(
logFC = colDef(format=colFormat(digits=3)),
AveExpr = colDef(format=colFormat(digits=3)),
t = colDef(format=colFormat(digits=3)),
P.Value = colDef(format=colFormat(digits=3)),
adj.P.Val = colDef (format=colFormat(digits=3)),
z.std = colDef(format=colFormat(digits=3))
))
z2 <- z2[z2$adj.P.Val <= 0.13,] # only significantly regulated genes
z2 <- z2[order(z2$logFC),]
z <- as.data.frame(vobjDream$E) # this is the whole expression matrix
z <- z[row.names(z2),]
z$gene <- factor(row.names(z), levels=row.names(z2), ordered=TRUE)
GE <- pivot_longer(z, cols=c(1:12), names_to='sample', values_to='expression')
GE$regime <- factor('regular', levels=c('regular', 'random'))
GE[GE$sample %in% c("X1A_S8", "X1C_S1", "X2A_S7", "X2C_S5", "X4A_S6", "X4C_S12"), 'regime'] <- 'random'
ggplot(data=GE, mapping=aes(x=regime, y=expression, color=regime)) +
geom_boxplot() + facet_wrap(~gene) + ggtitle(Term(GOTERM[goterm]))
goterm <- 'GO:0004222'
Term(GOTERM[goterm])
## GO:0004222
## "metalloendopeptidase activity"
genes <- names(fullAnnotation[grep(goterm, fullAnnotation, fixed=TRUE)])
genes <- genes[genes %in% row.names(varPart)]
plotPercentBars(varPart[genes[order(-varPart[genes, 'regime'])], c('regime', 'treatment', 'population', 'Residuals')])
z2 <- topTable(fitmm, coef='regime', number=length(fitmm$F))
z2 <- z2[row.names(z2) %in% genes,] # still ordered by p-value!
reactable(z2,
columns = list(
logFC = colDef(format=colFormat(digits=3)),
AveExpr = colDef(format=colFormat(digits=3)),
t = colDef(format=colFormat(digits=3)),
P.Value = colDef(format=colFormat(digits=3)),
adj.P.Val = colDef (format=colFormat(digits=3)),
z.std = colDef(format=colFormat(digits=3))
))
z2 <- z2[z2$adj.P.Val <= 0.11,] # only significantly regulated genes
z2 <- z2[order(z2$logFC),]
z <- as.data.frame(vobjDream$E) # this is the whole expression matrix
z <- z[row.names(z2),]
z$gene <- factor(row.names(z), levels=row.names(z2), ordered=TRUE)
GE <- pivot_longer(z, cols=c(1:12), names_to='sample', values_to='expression')
GE$regime <- factor('regular', levels=c('regular', 'random'))
GE[GE$sample %in% c("X1A_S8", "X1C_S1", "X2A_S7", "X2C_S5", "X4A_S6", "X4C_S12"), 'regime'] <- 'random'
ggplot(data=GE, mapping=aes(x=regime, y=expression, color=regime)) +
geom_boxplot() + facet_wrap(~gene) + ggtitle(Term(GOTERM[goterm]))
goterm <- 'GO:0016715'
Term(GOTERM[goterm])
## GO:0016715
## "oxidoreductase activity, acting on paired donors, with incorporation or reduction of molecular oxygen, reduced ascorbate as one donor, and incorporation of one atom of oxygen"
genes <- names(fullAnnotation[grep(goterm, fullAnnotation, fixed=TRUE)])
genes <- genes[genes %in% row.names(varPart)]
plotPercentBars(varPart[genes[order(-varPart[genes, 'regime'])], c('regime', 'treatment', 'population', 'Residuals')])
z2 <- topTable(fitmm, coef='regime', number=length(fitmm$F))
z2 <- z2[row.names(z2) %in% genes,] # still ordered by p-value!
reactable(z2,
columns = list(
logFC = colDef(format=colFormat(digits=3)),
AveExpr = colDef(format=colFormat(digits=3)),
t = colDef(format=colFormat(digits=3)),
P.Value = colDef(format=colFormat(digits=3)),
adj.P.Val = colDef (format=colFormat(digits=3)),
z.std = colDef(format=colFormat(digits=3))
))
#z2 <- z2[z2$adj.P.Val <= 0.11,] # only significantly regulated genes
z2 <- z2[order(z2$logFC),]
z <- as.data.frame(vobjDream$E) # this is the whole expression matrix
z <- z[row.names(z2),]
z$gene <- factor(row.names(z), levels=row.names(z2), ordered=TRUE)
GE <- pivot_longer(z, cols=c(1:12), names_to='sample', values_to='expression')
GE$regime <- factor('regular', levels=c('regular', 'random'))
GE[GE$sample %in% c("X1A_S8", "X1C_S1", "X2A_S7", "X2C_S5", "X4A_S6", "X4C_S12"), 'regime'] <- 'random'
ggplot(data=GE, mapping=aes(x=regime, y=expression, color=regime)) +
geom_boxplot() + facet_wrap(~gene) + ggtitle(Term(GOTERM[goterm]))